Most thorough way to readdress differences between original and current analyses is to jsut redo the biomark. To that end, will restart analysis of just CMPm2, excluding other datasets (i.e., LSK, MEP, GMP). Will also keep
Creating new pipeline using seurat v4.0.2 available 2021.06.08
Load libraries required for Seuratv4
library(dplyr)
library(Seurat)
library(patchwork)
library(ggplot2)
library(clustree)
projectName <- "CMP"
jackstraw.dim <- 40
store session info
sessionInfo.filename <- paste0(projectName, "_sessionInfo.txt")
sink(sessionInfo.filename)
sessionInfo()
sink()
source("../RFunctions/read_10XGenomics_data.R")
source("../RFunctions/PercentVariance.R")
source("../RFunctions/Mouse2Human_idconversion.R")
source ("../RFunctions/ColorPalette.R")
setwd("../../cellRanger/") # temporarily changing wd only works if you run the entire chunk at once
data_file.list <- read_10XGenomics_data(sample.list = "CMPm2")
object.data <-Read10X(data_file.list)
cmp.object<- CreateSeuratObject(counts = object.data, min.cells = 3, min.genes = 200, project = projectName)
Clean up to free memory
remove(object.data)
Add mitochondrial metadata and plot some basic features
cmp.object[["percent.mt"]] <- PercentageFeatureSet(cmp.object, pattern = "^mt-")
VlnPlot(cmp.object, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3, pt.size = 0, fill.by = 'orig.ident', )
plot1 <- FeatureScatter(cmp.object, feature1 = "nCount_RNA", feature2 = "percent.mt", group.by = "orig.ident", pt.size = 0.01)
plot2 <- FeatureScatter(cmp.object, feature1 = "nCount_RNA", feature2 = "nFeature_RNA", group.by = "orig.ident", pt.size = 0.01)
plot1 + plot2
remove low quality cells require: nFeature_RNA between 200 and 4000 (inclusive) require: percent.mt <=5
print(paste("original object:", nrow(cmp.object@meta.data), "cells", sep = " "))
[1] "original object: 12540 cells"
cmp.object <- subset(cmp.object,
subset = nFeature_RNA >=200 &
nFeature_RNA <= 4000 &
percent.mt <= 5
)
print(paste("new object:", nrow(cmp.object@meta.data), "cells", sep = " "))
[1] "new object: 12059 cells"
cmp.object <- NormalizeData(cmp.object, normalization.method = "LogNormalize", scale.factor = 10000)
Performing log-normalization
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Find variable features
cmp.object <- FindVariableFeatures(cmp.object, selection.method = "vst", nfeatures = 2000)
Calculating gene variances
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Calculating feature variances of standardized and clipped values
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
top10 <- head(VariableFeatures(cmp.object), 10)
plot1 <- VariableFeaturePlot(cmp.object)
plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE)
When using repel, set xnudge and ynudge to 0 for optimal results
plot1 + plot2
We don’t have to worry about comparing library depths, so we’ll just do normalization/Scale data
Regressing out nFeature_RNA, nCount_RNA
|
| | 0%
|
|= | 0%
|
|= | 1%
|
|== | 1%
|
|=== | 1%
|
|=== | 2%
|
|==== | 2%
|
|===== | 2%
|
|===== | 3%
|
|====== | 3%
|
|======= | 3%
|
|======= | 4%
|
|======== | 4%
|
|========= | 4%
|
|========= | 5%
|
|========== | 5%
|
|=========== | 5%
|
|=========== | 6%
|
|============ | 6%
|
|============= | 6%
|
|============== | 6%
|
|============== | 7%
|
|=============== | 7%
|
|================ | 7%
|
|================ | 8%
|
|================= | 8%
|
|================== | 8%
|
|================== | 9%
|
|=================== | 9%
|
|==================== | 9%
|
|==================== | 10%
|
|===================== | 10%
|
|====================== | 10%
|
|====================== | 11%
|
|======================= | 11%
|
|======================== | 11%
|
|======================== | 12%
|
|========================= | 12%
|
|========================== | 12%
|
|========================== | 13%
|
|=========================== | 13%
|
|============================ | 13%
|
|============================ | 14%
|
|============================= | 14%
|
|============================== | 14%
|
|============================== | 15%
|
|=============================== | 15%
|
|================================ | 15%
|
|================================ | 16%
|
|================================= | 16%
|
|================================== | 16%
|
|================================== | 17%
|
|=================================== | 17%
|
|==================================== | 17%
|
|==================================== | 18%
|
|===================================== | 18%
|
|====================================== | 18%
|
|====================================== | 19%
|
|======================================= | 19%
|
|======================================== | 19%
|
|========================================= | 19%
|
|========================================= | 20%
|
|========================================== | 20%
|
|=========================================== | 20%
|
|=========================================== | 21%
|
|============================================ | 21%
|
|============================================= | 21%
|
|============================================= | 22%
|
|============================================== | 22%
|
|=============================================== | 22%
|
|=============================================== | 23%
|
|================================================ | 23%
|
|================================================= | 23%
|
|================================================= | 24%
|
|================================================== | 24%
|
|=================================================== | 24%
|
|=================================================== | 25%
|
|==================================================== | 25%
|
|===================================================== | 25%
|
|===================================================== | 26%
|
|====================================================== | 26%
|
|======================================================= | 26%
|
|======================================================= | 27%
|
|======================================================== | 27%
|
|========================================================= | 27%
|
|========================================================= | 28%
|
|========================================================== | 28%
|
|=========================================================== | 28%
|
|=========================================================== | 29%
|
|============================================================ | 29%
|
|============================================================= | 29%
|
|============================================================= | 30%
|
|============================================================== | 30%
|
|=============================================================== | 30%
|
|=============================================================== | 31%
|
|================================================================ | 31%
|
|================================================================= | 31%
|
|================================================================== | 31%
|
|================================================================== | 32%
|
|=================================================================== | 32%
|
|==================================================================== | 32%
|
|==================================================================== | 33%
|
|===================================================================== | 33%
|
|====================================================================== | 33%
|
|====================================================================== | 34%
|
|======================================================================= | 34%
|
|======================================================================== | 34%
|
|======================================================================== | 35%
|
|========================================================================= | 35%
|
|========================================================================== | 35%
|
|========================================================================== | 36%
|
|=========================================================================== | 36%
|
|============================================================================ | 36%
|
|============================================================================ | 37%
|
|============================================================================= | 37%
|
|============================================================================== | 37%
|
|============================================================================== | 38%
|
|=============================================================================== | 38%
|
|================================================================================ | 38%
|
|================================================================================ | 39%
|
|================================================================================= | 39%
|
|================================================================================== | 39%
|
|================================================================================== | 40%
|
|=================================================================================== | 40%
|
|==================================================================================== | 40%
|
|==================================================================================== | 41%
|
|===================================================================================== | 41%
|
|====================================================================================== | 41%
|
|====================================================================================== | 42%
|
|======================================================================================= | 42%
|
|======================================================================================== | 42%
|
|======================================================================================== | 43%
|
|========================================================================================= | 43%
|
|========================================================================================== | 43%
|
|========================================================================================== | 44%
|
|=========================================================================================== | 44%
|
|============================================================================================ | 44%
|
|============================================================================================= | 44%
|
|============================================================================================= | 45%
|
|============================================================================================== | 45%
|
|=============================================================================================== | 45%
|
|=============================================================================================== | 46%
|
|================================================================================================ | 46%
|
|================================================================================================= | 46%
|
|================================================================================================= | 47%
|
|================================================================================================== | 47%
|
|=================================================================================================== | 47%
|
|=================================================================================================== | 48%
|
|==================================================================================================== | 48%
|
|===================================================================================================== | 48%
|
|===================================================================================================== | 49%
|
|====================================================================================================== | 49%
|
|======================================================================================================= | 49%
|
|======================================================================================================= | 50%
|
|======================================================================================================== | 50%
|
|========================================================================================================= | 50%
|
|========================================================================================================= | 51%
|
|========================================================================================================== | 51%
|
|=========================================================================================================== | 51%
|
|=========================================================================================================== | 52%
|
|============================================================================================================ | 52%
|
|============================================================================================================= | 52%
|
|============================================================================================================= | 53%
|
|============================================================================================================== | 53%
|
|=============================================================================================================== | 53%
|
|=============================================================================================================== | 54%
|
|================================================================================================================ | 54%
|
|================================================================================================================= | 54%
|
|================================================================================================================= | 55%
|
|================================================================================================================== | 55%
|
|=================================================================================================================== | 55%
|
|=================================================================================================================== | 56%
|
|==================================================================================================================== | 56%
|
|===================================================================================================================== | 56%
|
|====================================================================================================================== | 56%
|
|====================================================================================================================== | 57%
|
|======================================================================================================================= | 57%
|
|======================================================================================================================== | 57%
|
|======================================================================================================================== | 58%
|
|========================================================================================================================= | 58%
|
|========================================================================================================================== | 58%
|
|========================================================================================================================== | 59%
|
|=========================================================================================================================== | 59%
|
|============================================================================================================================ | 59%
|
|============================================================================================================================ | 60%
|
|============================================================================================================================= | 60%
|
|============================================================================================================================== | 60%
|
|============================================================================================================================== | 61%
|
|=============================================================================================================================== | 61%
|
|================================================================================================================================ | 61%
|
|================================================================================================================================ | 62%
|
|================================================================================================================================= | 62%
|
|================================================================================================================================== | 62%
|
|================================================================================================================================== | 63%
|
|=================================================================================================================================== | 63%
|
|==================================================================================================================================== | 63%
|
|==================================================================================================================================== | 64%
|
|===================================================================================================================================== | 64%
|
|====================================================================================================================================== | 64%
|
|====================================================================================================================================== | 65%
|
|======================================================================================================================================= | 65%
|
|======================================================================================================================================== | 65%
|
|======================================================================================================================================== | 66%
|
|========================================================================================================================================= | 66%
|
|========================================================================================================================================== | 66%
|
|========================================================================================================================================== | 67%
|
|=========================================================================================================================================== | 67%
|
|============================================================================================================================================ | 67%
|
|============================================================================================================================================ | 68%
|
|============================================================================================================================================= | 68%
|
|============================================================================================================================================== | 68%
|
|============================================================================================================================================== | 69%
|
|=============================================================================================================================================== | 69%
|
|================================================================================================================================================ | 69%
|
|================================================================================================================================================= | 69%
|
|================================================================================================================================================= | 70%
|
|================================================================================================================================================== | 70%
|
|=================================================================================================================================================== | 70%
|
|=================================================================================================================================================== | 71%
|
|==================================================================================================================================================== | 71%
|
|===================================================================================================================================================== | 71%
|
|===================================================================================================================================================== | 72%
|
|====================================================================================================================================================== | 72%
|
|======================================================================================================================================================= | 72%
|
|======================================================================================================================================================= | 73%
|
|======================================================================================================================================================== | 73%
|
|========================================================================================================================================================= | 73%
|
|========================================================================================================================================================= | 74%
|
|========================================================================================================================================================== | 74%
|
|=========================================================================================================================================================== | 74%
|
|=========================================================================================================================================================== | 75%
|
|============================================================================================================================================================ | 75%
|
|============================================================================================================================================================= | 75%
|
|============================================================================================================================================================= | 76%
|
|============================================================================================================================================================== | 76%
|
|=============================================================================================================================================================== | 76%
|
|=============================================================================================================================================================== | 77%
|
|================================================================================================================================================================ | 77%
|
|================================================================================================================================================================= | 77%
|
|================================================================================================================================================================= | 78%
|
|================================================================================================================================================================== | 78%
|
|=================================================================================================================================================================== | 78%
|
|=================================================================================================================================================================== | 79%
|
|==================================================================================================================================================================== | 79%
|
|===================================================================================================================================================================== | 79%
|
|===================================================================================================================================================================== | 80%
|
|====================================================================================================================================================================== | 80%
|
|======================================================================================================================================================================= | 80%
|
|======================================================================================================================================================================= | 81%
|
|======================================================================================================================================================================== | 81%
|
|========================================================================================================================================================================= | 81%
|
|========================================================================================================================================================================== | 81%
|
|========================================================================================================================================================================== | 82%
|
|=========================================================================================================================================================================== | 82%
|
|============================================================================================================================================================================ | 82%
|
|============================================================================================================================================================================ | 83%
|
|============================================================================================================================================================================= | 83%
|
|============================================================================================================================================================================== | 83%
|
|============================================================================================================================================================================== | 84%
|
|=============================================================================================================================================================================== | 84%
|
|================================================================================================================================================================================ | 84%
|
|================================================================================================================================================================================ | 85%
|
|================================================================================================================================================================================= | 85%
|
|================================================================================================================================================================================== | 85%
|
|================================================================================================================================================================================== | 86%
|
|=================================================================================================================================================================================== | 86%
|
|==================================================================================================================================================================================== | 86%
|
|==================================================================================================================================================================================== | 87%
|
|===================================================================================================================================================================================== | 87%
|
|====================================================================================================================================================================================== | 87%
|
|====================================================================================================================================================================================== | 88%
|
|======================================================================================================================================================================================= | 88%
|
|======================================================================================================================================================================================== | 88%
|
|======================================================================================================================================================================================== | 89%
|
|========================================================================================================================================================================================= | 89%
|
|========================================================================================================================================================================================== | 89%
|
|========================================================================================================================================================================================== | 90%
|
|=========================================================================================================================================================================================== | 90%
|
|============================================================================================================================================================================================ | 90%
|
|============================================================================================================================================================================================ | 91%
|
|============================================================================================================================================================================================= | 91%
|
|============================================================================================================================================================================================== | 91%
|
|============================================================================================================================================================================================== | 92%
|
|=============================================================================================================================================================================================== | 92%
|
|================================================================================================================================================================================================ | 92%
|
|================================================================================================================================================================================================ | 93%
|
|================================================================================================================================================================================================= | 93%
|
|================================================================================================================================================================================================== | 93%
|
|================================================================================================================================================================================================== | 94%
|
|=================================================================================================================================================================================================== | 94%
|
|==================================================================================================================================================================================================== | 94%
|
|===================================================================================================================================================================================================== | 94%
|
|===================================================================================================================================================================================================== | 95%
|
|====================================================================================================================================================================================================== | 95%
|
|======================================================================================================================================================================================================= | 95%
|
|======================================================================================================================================================================================================= | 96%
|
|======================================================================================================================================================================================================== | 96%
|
|========================================================================================================================================================================================================= | 96%
|
|========================================================================================================================================================================================================= | 97%
|
|========================================================================================================================================================================================================== | 97%
|
|=========================================================================================================================================================================================================== | 97%
|
|=========================================================================================================================================================================================================== | 98%
|
|============================================================================================================================================================================================================ | 98%
|
|============================================================================================================================================================================================================= | 98%
|
|============================================================================================================================================================================================================= | 99%
|
|============================================================================================================================================================================================================== | 99%
|
|=============================================================================================================================================================================================================== | 99%
|
|=============================================================================================================================================================================================================== | 100%
|
|================================================================================================================================================================================================================| 100%
Centering and scaling data matrix
|
| | 0%
|
|============ | 6%
|
|======================== | 12%
|
|===================================== | 18%
|
|================================================= | 24%
|
|============================================================= | 29%
|
|========================================================================= | 35%
|
|====================================================================================== | 41%
|
|================================================================================================== | 47%
|
|============================================================================================================== | 53%
|
|========================================================================================================================== | 59%
|
|======================================================================================================================================= | 65%
|
|=================================================================================================================================================== | 71%
|
|=============================================================================================================================================================== | 76%
|
|=========================================================================================================================================================================== | 82%
|
|======================================================================================================================================================================================== | 88%
|
|==================================================================================================================================================================================================== | 94%
|
|================================================================================================================================================================================================================| 100%
saveRDS(cmp.object, file = paste0(projectName, "_raw.RDS"))
cmp.object <- RunPCA(cmp.object, features = VariableFeatures(cmp.object), ndims.print = 1:5, nfeatures.print = 5)
PC_ 1
Positive: Vamp5, Nkg7, Car2, Apoe, Ctla2a
Negative: Lgals3, Aif1, Id2, Cst3, H2-Aa
PC_ 2
Positive: Prtn3, Ctsg, H2afy, Mpo, Emb
Negative: Ube2c, Cenpf, Nusap1, Mki67, Cenpa
PC_ 3
Positive: Pf4, Tmsb4x, Cavin2, Serpine2, Rap1b
Negative: Plac8, Cks2, Cenpa, Ube2c, Tubb4b
PC_ 4
Positive: Csrp3, Car1, Jun, Apoe, Jund
Negative: H2afz, Hmgn2, Birc5, Hmgb1, Tuba1b
PC_ 5
Positive: Pclaf, Tyms, Rrm2, Tk1, Pcna
Negative: Hist1h2bc, Tsc22d1, Smim14, Ccnb2, Serpinb1a
DimPlot(cmp.object, reduction = "pca", group.by = "orig.ident")
VizDimLoadings(cmp.object, dims = 1:6, nfeatures = 10, reduction = "pca", ncol = 3)
Calculate dimensionality
ElbowPlot(cmp.object, ndims = 50)
percent.variance(cmp.object@reductions$pca@stdev)
Number of PCs describing X% of variance
ElbowPlot(cmp.object, ndims = 50)
percent.variance(cmp.object@reductions$pca@stdev)
Exported cell IDs for clusters 3, 17, 10, 11 from Seurat v1. Will add these IDs as a metadata column.
Create column “clust.ID” and populate with 0’s. Then import IDs for clusters
tot.var <- percent.variance(cmp.object@reductions$pca@stdev, plot.var = FALSE, return.val = TRUE)
paste0("Num pcs for 80% variance:", length(which(cumsum(tot.var) <= 80)))
[1] "Num pcs for 80% variance:18"
paste0("Num pcs for 85% variance:", length(which(cumsum(tot.var) <= 85)))
[1] "Num pcs for 85% variance:25"
paste0("Num pcs for 90% variance:", length(which(cumsum(tot.var) <= 90)))
[1] "Num pcs for 90% variance:33"
paste0("Num pcs for 95% variance:", length(which(cumsum(tot.var) <= 95)))
[1] "Num pcs for 95% variance:41"
Add new metadata column
clust3.cells <- read.table(file = "../Seuratv1_clusterCellIDs/cluster3cellIDs.txt", col.names = "clust03")
clust3.cells <- sapply(clust3.cells, function(x) paste0(gsub("CMP", "CMPm2", x), "-1"))
clust17.cells <- read.table(file = "../Seuratv1_clusterCellIDs/cluster17cellIDs.txt", col.names = "clust17")
clust17.cells <- sapply(clust17.cells, function(x) paste0(gsub("CMP", "CMPm2", x), "-1"))
clust10.cells <- read.table(file = "../Seuratv1_clusterCellIDs/cluster10cellIDs.txt", col.names = "clust10")
clust10.cells <- sapply(clust10.cells, function(x) paste0(gsub("CMP", "CMPm2", x), "-1"))
clust11.cells <- read.table(file = "../Seuratv1_clusterCellIDs/cluster11cellIDs.txt", col.names = "clust11")
clust11.cells <- sapply(clust11.cells, function(x) paste0(gsub("CMP", "CMPm2", x), "-1"))
now map new ids
cmp.object@meta.data['clust.ID'] <- 0
head(cmp.object@meta.data)
Registered S3 method overwritten by 'cli':
method from
print.boxx spatstat.geom
do numbers make sense?
cmp.object@meta.data$clust.ID[rownames(cmp.object@meta.data) %in% clust3.cells] <- 3
cmp.object@meta.data$clust.ID[rownames(cmp.object@meta.data) %in% clust17.cells] <- 17
cmp.object@meta.data$clust.ID[rownames(cmp.object@meta.data) %in% clust10.cells] <- 10
cmp.object@meta.data$clust.ID[rownames(cmp.object@meta.data) %in% clust11.cells] <- 11
set total.var <- 90%
nrow(cmp.object@meta.data[cmp.object@meta.data$clust.ID == 10,])
[1] 1049
nrow(cmp.object@meta.data[cmp.object@meta.data$clust.ID == 11,])
[1] 1118
nrow(cmp.object@meta.data[cmp.object@meta.data$clust.ID == 17,])
[1] 883
nrow(cmp.object@meta.data[cmp.object@meta.data$clust.ID == 3,])
[1] 1931
Plot UMAP
tot.var <- percent.variance(cmp.object@reductions$pca@stdev, plot.var = FALSE, return.val = TRUE)
ndims <- length(which(cumsum(tot.var) <= 90))
cmp.object <- FindNeighbors(cmp.object, dims = 1:ndims)
Computing nearest neighbor graph
Computing SNN
cmp.object <- FindClusters(cmp.object, resolution = 0.5)
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 12059
Number of edges: 452999
Running Louvain algorithm...
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Maximum modularity in 10 random starts: 0.8502
Number of communities: 9
Elapsed time: 1 seconds
cmp.object <- RunUMAP(cmp.object, dims = 1: ndims)
14:08:12 UMAP embedding parameters a = 0.9922 b = 1.112
14:08:12 Read 12059 rows and found 33 numeric columns
14:08:12 Using Annoy for neighbor search, n_neighbors = 30
14:08:12 Building Annoy index with metric = cosine, n_trees = 50
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
14:08:13 Writing NN index file to temp file /var/folders/4f/fwrj6fnn1dn4g8wsf0zv563hjsvl24/T//Rtmp5gGDJN/file582a2b2d9d6c
14:08:14 Searching Annoy index using 1 thread, search_k = 3000
14:08:16 Annoy recall = 100%
14:08:17 Commencing smooth kNN distance calibration using 1 thread
14:08:17 Initializing from normalized Laplacian + noise
14:08:18 Commencing optimization for 200 epochs, with 513294 positive edges
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
14:08:23 Optimization finished
for(x in c(0.5, 1, 1.5, 2, 2.5)){
cmp.object <- FindClusters(cmp.object, resolution = x)
}
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 12059
Number of edges: 452999
Running Louvain algorithm...
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Maximum modularity in 10 random starts: 0.8502
Number of communities: 9
Elapsed time: 1 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 12059
Number of edges: 452999
Running Louvain algorithm...
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Maximum modularity in 10 random starts: 0.7974
Number of communities: 17
Elapsed time: 1 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 12059
Number of edges: 452999
Running Louvain algorithm...
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Maximum modularity in 10 random starts: 0.7652
Number of communities: 22
Elapsed time: 1 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 12059
Number of edges: 452999
Running Louvain algorithm...
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Maximum modularity in 10 random starts: 0.7404
Number of communities: 27
Elapsed time: 1 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 12059
Number of edges: 452999
Running Louvain algorithm...
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Maximum modularity in 10 random starts: 0.7189
Number of communities: 30
Elapsed time: 1 seconds
saveRDS(cmp.object, file = paste0(projectName, "_dim", ndims, ".RDS"))
what’s the max resolution we can achieve while keepign clusters stable?
for (meta.col in colnames(cmp.object@meta.data)){
if(grepl(pattern = ("RNA_snn_res"), x = meta.col)==TRUE){
myplot <- DimPlot(cmp.object,
group.by = meta.col,
reduction = "umap",
cols = color.palette
) +
ggtitle(paste0(projectName, " dim", ndims, "res", gsub("RNA_snn_res", "", meta.col) ))
plot(myplot)
}
}
I think I’m liking res.1.0 from this. Although how much does this change if I use fewer PCs…
for each resolution, number/percentage of cells in each cluster?
plot.title <- paste0(projectName, "_clustree_ndim", max(cmp.object@commands$RunUMAP.RNA.pca$dims))
my.clustree <- clustree(cmp.object, prefix = "RNA_snn_res.", node_colour = "sc3_stability", exprs = "scale.data") +
scale_color_continuous(low = 'red3', high = 'white') +
ggtitle(plot.title)
png(filename = paste0(plot.title, ".png"), height = 800, width = 1600)
plot(my.clustree)
dev.off()
null device
1
For each resolution, what percentage of cells in each cluster are enriched for one of our clust.IDs?
Test: what percentage of each new clusterID matches one of the older clusters?
tot.cells <- nrow(cmp.object@meta.data)
for (meta.col in colnames(cmp.object@meta.data)){
if(grepl(pattern = ("RNA_snn_res"), x = meta.col)==TRUE){
new.clusters <- sort(as.numeric(levels(cmp.object@meta.data[[meta.col]])))
stats.df <- data.frame(matrix(ncol = 2, nrow = length(new.clusters)))
colnames(stats.df) <- c("num_cells", "pct_pop")
rownames(stats.df) <- new.clusters
meta.df <- cmp.object@meta.data
for(row.id in rownames(stats.df)){
num.x <- nrow(meta.df[meta.df[meta.col] == row.id,])
pct.x <- as.integer(num.x / tot.cells *100)
# print(pct.x)
stats.df[row.id, "num_cells"] <- num.x
stats.df[row.id, "pct_pop"] <- pct.x
}
print(stats.df)
}
}
Absolutely terrible overlap, no enrichment of any of these across the new clustering algorithm. Maybe should try 95% variation covered
time for the super scarey moment to see if the cells from seuratv1 still cluster together on in seurat v4
for (meta.col in colnames(cmp.object@meta.data)){
if(grepl(pattern = ("RNA_snn_res"), x = meta.col)==TRUE){
new.clusters <- sort(as.numeric(levels(cmp.object@meta.data[[meta.col]])))
enrich.df <- data.frame(matrix(ncol = 4, nrow = length(new.clusters)))
colnames(enrich.df) <- c(3, 17, 10, 11)
rownames(enrich.df) <- new.clusters
meta.df <- cmp.object@meta.data
for(row.id in rownames(enrich.df)){
tot.clus <- nrow(meta.df[meta.df[[meta.col]] == row.id,])
for(col.id in colnames(enrich.df)){
num.x <- nrow(meta.df[(meta.df[[meta.col]] == row.id) & (meta.df$clust.ID == col.id),])
pct.x <- as.integer(num.x / tot.clus *100)
# print(pct.x)
enrich.df[row.id, col.id] <- pct.x
}
}
colnames(enrich.df) <- sapply(colnames(enrich.df), function(x) paste0("oldcluster", x))
rownames(enrich.df) <- sapply(rownames(enrich.df), function(x) paste0("newcluster", x))
xlsx::write.xlsx(enrich.df, file = paste0("PctOfNewClustersOverlappingOldClusters_", projectName, "_dim", ndims, ".xlsx"), sheetName = paste0(gsub("RNA_snn_", "", meta.col)), append = TRUE)
print(enrich.df)
}
}
NA
DimPlot(cmp.object,
reduction = "umap",
group.by = "clust.ID",
# split.by = "orig.ident",
cols = c("gray", "orange", "blue", "red", "green"),)
set total.var <- 85%
DimPlot(cmp.object,
reduction = "umap",
group.by = "orig.ident",
split.by = "clust.ID",
cols = c("gray", "orange", "blue", "red", "green"),)
Plot UMAP
tot.var <- percent.variance(cmp.object@reductions$pca@stdev, plot.var = FALSE, return.val = TRUE)
ndims <- length(which(cumsum(tot.var) <= 85))
cmp.object <- FindNeighbors(cmp.object, dims = 1:ndims)
Computing nearest neighbor graph
Computing SNN
cmp.object <- FindClusters(cmp.object, resolution = 0.5)
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 12059
Number of edges: 424091
Running Louvain algorithm...
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Maximum modularity in 10 random starts: 0.8508
Number of communities: 9
Elapsed time: 1 seconds
cmp.object <- RunUMAP(cmp.object, dims = 1: ndims)
14:14:39 UMAP embedding parameters a = 0.9922 b = 1.112
14:14:39 Read 12059 rows and found 25 numeric columns
14:14:39 Using Annoy for neighbor search, n_neighbors = 30
14:14:39 Building Annoy index with metric = cosine, n_trees = 50
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
14:14:40 Writing NN index file to temp file /var/folders/4f/fwrj6fnn1dn4g8wsf0zv563hjsvl24/T//Rtmp5gGDJN/file582a3fba3399
14:14:40 Searching Annoy index using 1 thread, search_k = 3000
14:14:42 Annoy recall = 100%
14:14:43 Commencing smooth kNN distance calibration using 1 thread
14:14:44 Initializing from normalized Laplacian + noise
14:14:44 Commencing optimization for 200 epochs, with 500658 positive edges
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
14:14:49 Optimization finished
for(x in c(0.5, 1, 1.5, 2, 2.5)){
cmp.object <- FindClusters(cmp.object, resolution = x)
saveRDS(cmp.object, file = paste0(projectName, "_dim", ndims, ".RDS"))
}
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 12059
Number of edges: 424091
Running Louvain algorithm...
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Maximum modularity in 10 random starts: 0.8508
Number of communities: 9
Elapsed time: 1 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 12059
Number of edges: 424091
Running Louvain algorithm...
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Maximum modularity in 10 random starts: 0.8026
Number of communities: 18
Elapsed time: 2 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 12059
Number of edges: 424091
Running Louvain algorithm...
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Maximum modularity in 10 random starts: 0.7700
Number of communities: 22
Elapsed time: 1 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 12059
Number of edges: 424091
Running Louvain algorithm...
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
Maximum modularity in 10 random starts: 0.7461
Number of communities: 28
Elapsed time: 1 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 12059
Number of edges: 424091
Running Louvain algorithm...
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
what’s the max resolution we can achieve while keepign clusters stable?
for (meta.col in colnames(cmp.object@meta.data)){
if(grepl(pattern = ("RNA_snn_res"), x = meta.col)==TRUE){
myplot <- DimPlot(cmp.object,
group.by = meta.col,
reduction = "umap",
cols = color.palette
) +
ggtitle(paste0(projectName, " dim", ndims, "res", gsub("RNA_snn_res", "", meta.col) ))
plot(myplot)
}
}
for each resolution, number/percentage of cells in each cluster?
plot.title <- paste0(projectName, "_clustree_ndim", max(cmp.object@commands$RunUMAP.RNA.pca$dims))
my.clustree <- clustree(cmp.object, prefix = "RNA_snn_res.", node_colour = "sc3_stability", exprs = "scale.data") +
scale_color_continuous(low = 'red3', high = 'white') +
ggtitle(plot.title)
png(filename = paste0(plot.title, ".png"), height = 800, width = 1600)
plot(my.clustree)
dev.off()
null device
1
length(cmp.object@assays$RNA@var.features)
set ident at res = 1 and get markers
length(cmp.object@assays$RNA@var.features)
[1] 2000
Idents(cmp.object) <- "RNA_snn_res.1"
cmp.allmarkers.res1 <- FindAllMarkers(cmp.object)
Calculating cluster 0
| | 0 % ~calculating
|+ | 1 % ~20s
|++ | 2 % ~20s
|++ | 3 % ~19s
|+++ | 4 % ~19s
|+++ | 5 % ~19s
|++++ | 6 % ~18s
|++++ | 7 % ~18s
|+++++ | 8 % ~18s
|+++++ | 9 % ~18s
|++++++ | 10% ~18s
|++++++ | 11% ~18s
|+++++++ | 12% ~17s
|+++++++ | 14% ~17s
|++++++++ | 15% ~17s
|++++++++ | 16% ~17s
|+++++++++ | 17% ~17s
|+++++++++ | 18% ~16s
|++++++++++ | 19% ~16s
|++++++++++ | 20% ~16s
|+++++++++++ | 21% ~16s
|+++++++++++ | 22% ~16s
|++++++++++++ | 23% ~16s
|++++++++++++ | 24% ~15s
|+++++++++++++ | 25% ~15s
|++++++++++++++ | 26% ~15s
|++++++++++++++ | 27% ~15s
|+++++++++++++++ | 28% ~15s
|+++++++++++++++ | 29% ~14s
|++++++++++++++++ | 30% ~14s
|++++++++++++++++ | 31% ~14s
|+++++++++++++++++ | 32% ~14s
|+++++++++++++++++ | 33% ~14s
|++++++++++++++++++ | 34% ~14s
|++++++++++++++++++ | 35% ~14s
|+++++++++++++++++++ | 36% ~14s
|+++++++++++++++++++ | 38% ~13s
|++++++++++++++++++++ | 39% ~13s
|++++++++++++++++++++ | 40% ~13s
|+++++++++++++++++++++ | 41% ~13s
|+++++++++++++++++++++ | 42% ~12s
|++++++++++++++++++++++ | 43% ~12s
|++++++++++++++++++++++ | 44% ~12s
|+++++++++++++++++++++++ | 45% ~12s
|+++++++++++++++++++++++ | 46% ~11s
|++++++++++++++++++++++++ | 47% ~11s
|++++++++++++++++++++++++ | 48% ~11s
|+++++++++++++++++++++++++ | 49% ~11s
|+++++++++++++++++++++++++ | 50% ~10s
|++++++++++++++++++++++++++ | 51% ~10s
|+++++++++++++++++++++++++++ | 52% ~10s
|+++++++++++++++++++++++++++ | 53% ~10s
|++++++++++++++++++++++++++++ | 54% ~10s
|++++++++++++++++++++++++++++ | 55% ~09s
|+++++++++++++++++++++++++++++ | 56% ~09s
|+++++++++++++++++++++++++++++ | 57% ~09s
|++++++++++++++++++++++++++++++ | 58% ~09s
|++++++++++++++++++++++++++++++ | 59% ~08s
|+++++++++++++++++++++++++++++++ | 60% ~08s
|+++++++++++++++++++++++++++++++ | 61% ~08s
|++++++++++++++++++++++++++++++++ | 62% ~08s
|++++++++++++++++++++++++++++++++ | 64% ~08s
|+++++++++++++++++++++++++++++++++ | 65% ~07s
|+++++++++++++++++++++++++++++++++ | 66% ~07s
|++++++++++++++++++++++++++++++++++ | 67% ~07s
|++++++++++++++++++++++++++++++++++ | 68% ~07s
|+++++++++++++++++++++++++++++++++++ | 69% ~07s
|+++++++++++++++++++++++++++++++++++ | 70% ~06s
|++++++++++++++++++++++++++++++++++++ | 71% ~06s
|++++++++++++++++++++++++++++++++++++ | 72% ~06s
|+++++++++++++++++++++++++++++++++++++ | 73% ~06s
|+++++++++++++++++++++++++++++++++++++ | 74% ~05s
|++++++++++++++++++++++++++++++++++++++ | 75% ~05s
|+++++++++++++++++++++++++++++++++++++++ | 76% ~05s
|+++++++++++++++++++++++++++++++++++++++ | 77% ~05s
|++++++++++++++++++++++++++++++++++++++++ | 78% ~05s
|++++++++++++++++++++++++++++++++++++++++ | 79% ~04s
|+++++++++++++++++++++++++++++++++++++++++ | 80% ~04s
|+++++++++++++++++++++++++++++++++++++++++ | 81% ~04s
|++++++++++++++++++++++++++++++++++++++++++ | 82% ~04s
|++++++++++++++++++++++++++++++++++++++++++ | 83% ~03s
|+++++++++++++++++++++++++++++++++++++++++++ | 84% ~03s
|+++++++++++++++++++++++++++++++++++++++++++ | 85% ~03s
|++++++++++++++++++++++++++++++++++++++++++++ | 86% ~03s
|++++++++++++++++++++++++++++++++++++++++++++ | 88% ~03s
|+++++++++++++++++++++++++++++++++++++++++++++ | 89% ~02s
|+++++++++++++++++++++++++++++++++++++++++++++ | 90% ~02s
|++++++++++++++++++++++++++++++++++++++++++++++ | 91% ~02s
|++++++++++++++++++++++++++++++++++++++++++++++ | 92% ~02s
|+++++++++++++++++++++++++++++++++++++++++++++++ | 93% ~02s
|+++++++++++++++++++++++++++++++++++++++++++++++ | 94% ~01s
|++++++++++++++++++++++++++++++++++++++++++++++++ | 95% ~01s
|++++++++++++++++++++++++++++++++++++++++++++++++ | 96% ~01s
|+++++++++++++++++++++++++++++++++++++++++++++++++ | 97% ~01s
|+++++++++++++++++++++++++++++++++++++++++++++++++ | 98% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 99% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=21s
Calculating cluster 1
| | 0 % ~calculating
|+ | 1 % ~08s
|++ | 2 % ~07s
|++ | 3 % ~07s
|+++ | 4 % ~07s
|+++ | 5 % ~07s
|++++ | 6 % ~07s
|++++ | 7 % ~07s
|+++++ | 8 % ~07s
|+++++ | 9 % ~07s
|++++++ | 10% ~07s
|++++++ | 11% ~06s
|+++++++ | 12% ~06s
|+++++++ | 13% ~06s
|++++++++ | 14% ~06s
|++++++++ | 15% ~06s
|+++++++++ | 16% ~06s
|+++++++++ | 17% ~06s
|++++++++++ | 18% ~06s
|++++++++++ | 19% ~06s
|+++++++++++ | 20% ~06s
|+++++++++++ | 21% ~06s
|++++++++++++ | 22% ~06s
|++++++++++++ | 23% ~05s
|+++++++++++++ | 24% ~05s
|+++++++++++++ | 25% ~05s
|++++++++++++++ | 26% ~06s
|++++++++++++++ | 27% ~06s
|+++++++++++++++ | 28% ~05s
|+++++++++++++++ | 29% ~05s
|++++++++++++++++ | 30% ~05s
|++++++++++++++++ | 31% ~05s
|+++++++++++++++++ | 32% ~05s
|+++++++++++++++++ | 33% ~05s
|++++++++++++++++++ | 34% ~05s
|++++++++++++++++++ | 35% ~05s
|+++++++++++++++++++ | 36% ~05s
|+++++++++++++++++++ | 37% ~05s
|++++++++++++++++++++ | 38% ~05s
|++++++++++++++++++++ | 39% ~05s
|+++++++++++++++++++++ | 40% ~05s
|+++++++++++++++++++++ | 41% ~04s
|++++++++++++++++++++++ | 42% ~04s
|++++++++++++++++++++++ | 43% ~04s
|+++++++++++++++++++++++ | 44% ~04s
|+++++++++++++++++++++++ | 45% ~04s
|++++++++++++++++++++++++ | 46% ~04s
|++++++++++++++++++++++++ | 47% ~04s
|+++++++++++++++++++++++++ | 48% ~04s
|+++++++++++++++++++++++++ | 49% ~04s
|++++++++++++++++++++++++++ | 51% ~04s
|++++++++++++++++++++++++++ | 52% ~04s
|+++++++++++++++++++++++++++ | 53% ~04s
|+++++++++++++++++++++++++++ | 54% ~03s
|++++++++++++++++++++++++++++ | 55% ~03s
|++++++++++++++++++++++++++++ | 56% ~03s
|+++++++++++++++++++++++++++++ | 57% ~03s
|+++++++++++++++++++++++++++++ | 58% ~03s
|++++++++++++++++++++++++++++++ | 59% ~03s
|++++++++++++++++++++++++++++++ | 60% ~03s
|+++++++++++++++++++++++++++++++ | 61% ~03s
|+++++++++++++++++++++++++++++++ | 62% ~03s
|++++++++++++++++++++++++++++++++ | 63% ~03s
|++++++++++++++++++++++++++++++++ | 64% ~03s
|+++++++++++++++++++++++++++++++++ | 65% ~03s
|+++++++++++++++++++++++++++++++++ | 66% ~02s
|++++++++++++++++++++++++++++++++++ | 67% ~02s
|++++++++++++++++++++++++++++++++++ | 68% ~02s
|+++++++++++++++++++++++++++++++++++ | 69% ~02s
|+++++++++++++++++++++++++++++++++++ | 70% ~02s
|++++++++++++++++++++++++++++++++++++ | 71% ~02s
|++++++++++++++++++++++++++++++++++++ | 72% ~02s
|+++++++++++++++++++++++++++++++++++++ | 73% ~02s
|+++++++++++++++++++++++++++++++++++++ | 74% ~02s
|++++++++++++++++++++++++++++++++++++++ | 75% ~02s
|++++++++++++++++++++++++++++++++++++++ | 76% ~02s
|+++++++++++++++++++++++++++++++++++++++ | 77% ~02s
|+++++++++++++++++++++++++++++++++++++++ | 78% ~02s
|++++++++++++++++++++++++++++++++++++++++ | 79% ~02s
|++++++++++++++++++++++++++++++++++++++++ | 80% ~02s
|+++++++++++++++++++++++++++++++++++++++++ | 81% ~01s
|+++++++++++++++++++++++++++++++++++++++++ | 82% ~01s
|++++++++++++++++++++++++++++++++++++++++++ | 83% ~01s
|++++++++++++++++++++++++++++++++++++++++++ | 84% ~01s
|+++++++++++++++++++++++++++++++++++++++++++ | 85% ~01s
|+++++++++++++++++++++++++++++++++++++++++++ | 86% ~01s
|++++++++++++++++++++++++++++++++++++++++++++ | 87% ~01s
|++++++++++++++++++++++++++++++++++++++++++++ | 88% ~01s
|+++++++++++++++++++++++++++++++++++++++++++++ | 89% ~01s
|+++++++++++++++++++++++++++++++++++++++++++++ | 90% ~01s
|++++++++++++++++++++++++++++++++++++++++++++++ | 91% ~01s
|++++++++++++++++++++++++++++++++++++++++++++++ | 92% ~01s
|+++++++++++++++++++++++++++++++++++++++++++++++ | 93% ~01s
|+++++++++++++++++++++++++++++++++++++++++++++++ | 94% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++ | 95% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++ | 96% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++++++ | 97% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++++++ | 98% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 99% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=07s
Calculating cluster 2
| | 0 % ~calculating
|+ | 1 % ~28s
|++ | 2 % ~27s
|++ | 3 % ~26s
|+++ | 4 % ~26s
|+++ | 5 % ~25s
|++++ | 6 % ~25s
|++++ | 7 % ~25s
|+++++ | 9 % ~25s
|+++++ | 10% ~26s
|++++++ | 11% ~25s
|++++++ | 12% ~25s
|+++++++ | 13% ~25s
|+++++++ | 14% ~24s
|++++++++ | 15% ~24s
|++++++++ | 16% ~23s
|+++++++++ | 17% ~23s
|++++++++++ | 18% ~23s
|++++++++++ | 19% ~22s
|+++++++++++ | 20% ~22s
|+++++++++++ | 21% ~22s
|++++++++++++ | 22% ~21s
|++++++++++++ | 23% ~21s
|+++++++++++++ | 24% ~21s
|+++++++++++++ | 26% ~20s
|++++++++++++++ | 27% ~20s
|++++++++++++++ | 28% ~20s
|+++++++++++++++ | 29% ~19s
|+++++++++++++++ | 30% ~19s
|++++++++++++++++ | 31% ~19s
|++++++++++++++++ | 32% ~18s
|+++++++++++++++++ | 33% ~18s
|++++++++++++++++++ | 34% ~18s
|++++++++++++++++++ | 35% ~18s
|+++++++++++++++++++ | 36% ~17s
|+++++++++++++++++++ | 37% ~17s
|++++++++++++++++++++ | 38% ~17s
|++++++++++++++++++++ | 39% ~16s
|+++++++++++++++++++++ | 40% ~16s
|+++++++++++++++++++++ | 41% ~16s
|++++++++++++++++++++++ | 43% ~16s
|++++++++++++++++++++++ | 44% ~15s
|+++++++++++++++++++++++ | 45% ~15s
|+++++++++++++++++++++++ | 46% ~15s
|++++++++++++++++++++++++ | 47% ~14s
|++++++++++++++++++++++++ | 48% ~14s
|+++++++++++++++++++++++++ | 49% ~14s
|+++++++++++++++++++++++++ | 50% ~13s
|++++++++++++++++++++++++++ | 51% ~13s
|+++++++++++++++++++++++++++ | 52% ~13s
|+++++++++++++++++++++++++++ | 53% ~13s
|++++++++++++++++++++++++++++ | 54% ~12s
|++++++++++++++++++++++++++++ | 55% ~12s
|+++++++++++++++++++++++++++++ | 56% ~12s
|+++++++++++++++++++++++++++++ | 57% ~11s
|++++++++++++++++++++++++++++++ | 59% ~11s
|++++++++++++++++++++++++++++++ | 60% ~11s
|+++++++++++++++++++++++++++++++ | 61% ~11s
|+++++++++++++++++++++++++++++++ | 62% ~10s
|++++++++++++++++++++++++++++++++ | 63% ~10s
|++++++++++++++++++++++++++++++++ | 64% ~10s
|+++++++++++++++++++++++++++++++++ | 65% ~10s
|+++++++++++++++++++++++++++++++++ | 66% ~09s
|++++++++++++++++++++++++++++++++++ | 67% ~09s
|+++++++++++++++++++++++++++++++++++ | 68% ~09s
|+++++++++++++++++++++++++++++++++++ | 69% ~08s
|++++++++++++++++++++++++++++++++++++ | 70% ~08s
|++++++++++++++++++++++++++++++++++++ | 71% ~08s
|+++++++++++++++++++++++++++++++++++++ | 72% ~07s
|+++++++++++++++++++++++++++++++++++++ | 73% ~07s
|++++++++++++++++++++++++++++++++++++++ | 74% ~07s
|++++++++++++++++++++++++++++++++++++++ | 76% ~07s
|+++++++++++++++++++++++++++++++++++++++ | 77% ~06s
|+++++++++++++++++++++++++++++++++++++++ | 78% ~06s
|++++++++++++++++++++++++++++++++++++++++ | 79% ~06s
|++++++++++++++++++++++++++++++++++++++++ | 80% ~05s
|+++++++++++++++++++++++++++++++++++++++++ | 81% ~05s
|+++++++++++++++++++++++++++++++++++++++++ | 82% ~05s
|++++++++++++++++++++++++++++++++++++++++++ | 83% ~05s
|+++++++++++++++++++++++++++++++++++++++++++ | 84% ~04s
|+++++++++++++++++++++++++++++++++++++++++++ | 85% ~04s
|++++++++++++++++++++++++++++++++++++++++++++ | 86% ~04s
|++++++++++++++++++++++++++++++++++++++++++++ | 87% ~03s
|+++++++++++++++++++++++++++++++++++++++++++++ | 88% ~03s
|+++++++++++++++++++++++++++++++++++++++++++++ | 89% ~03s
|++++++++++++++++++++++++++++++++++++++++++++++ | 90% ~03s
|++++++++++++++++++++++++++++++++++++++++++++++ | 91% ~02s
|+++++++++++++++++++++++++++++++++++++++++++++++ | 93% ~02s
|+++++++++++++++++++++++++++++++++++++++++++++++ | 94% ~02s
|++++++++++++++++++++++++++++++++++++++++++++++++ | 95% ~01s
|++++++++++++++++++++++++++++++++++++++++++++++++ | 96% ~01s
|+++++++++++++++++++++++++++++++++++++++++++++++++ | 97% ~01s
|+++++++++++++++++++++++++++++++++++++++++++++++++ | 98% ~01s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 99% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=27s
Calculating cluster 3
| | 0 % ~calculating
|+ | 1 % ~09s
|++ | 2 % ~08s
|++ | 3 % ~08s
|+++ | 4 % ~07s
|+++ | 5 % ~07s
|++++ | 6 % ~07s
|++++ | 7 % ~07s
|+++++ | 8 % ~07s
|+++++ | 9 % ~07s
|++++++ | 11% ~07s
|++++++ | 12% ~07s
|+++++++ | 13% ~06s
|+++++++ | 14% ~06s
|++++++++ | 15% ~06s
|++++++++ | 16% ~06s
|+++++++++ | 17% ~06s
|+++++++++ | 18% ~06s
|++++++++++ | 19% ~06s
|++++++++++ | 20% ~06s
|+++++++++++ | 21% ~06s
|++++++++++++ | 22% ~06s
|++++++++++++ | 23% ~06s
|+++++++++++++ | 24% ~06s
|+++++++++++++ | 25% ~06s
|++++++++++++++ | 26% ~05s
|++++++++++++++ | 27% ~05s
|+++++++++++++++ | 28% ~05s
|+++++++++++++++ | 29% ~05s
|++++++++++++++++ | 31% ~05s
|++++++++++++++++ | 32% ~05s
|+++++++++++++++++ | 33% ~05s
|+++++++++++++++++ | 34% ~05s
|++++++++++++++++++ | 35% ~05s
|++++++++++++++++++ | 36% ~05s
|+++++++++++++++++++ | 37% ~05s
|+++++++++++++++++++ | 38% ~05s
|++++++++++++++++++++ | 39% ~04s
|++++++++++++++++++++ | 40% ~04s
|+++++++++++++++++++++ | 41% ~04s
|++++++++++++++++++++++ | 42% ~04s
|++++++++++++++++++++++ | 43% ~04s
|+++++++++++++++++++++++ | 44% ~04s
|+++++++++++++++++++++++ | 45% ~04s
|++++++++++++++++++++++++ | 46% ~04s
|++++++++++++++++++++++++ | 47% ~04s
|+++++++++++++++++++++++++ | 48% ~04s
|+++++++++++++++++++++++++ | 49% ~04s
|++++++++++++++++++++++++++ | 51% ~04s
|++++++++++++++++++++++++++ | 52% ~04s
|+++++++++++++++++++++++++++ | 53% ~03s
|+++++++++++++++++++++++++++ | 54% ~03s
|++++++++++++++++++++++++++++ | 55% ~03s
|++++++++++++++++++++++++++++ | 56% ~03s
|+++++++++++++++++++++++++++++ | 57% ~03s
|+++++++++++++++++++++++++++++ | 58% ~03s
|++++++++++++++++++++++++++++++ | 59% ~03s
|++++++++++++++++++++++++++++++ | 60% ~03s
|+++++++++++++++++++++++++++++++ | 61% ~03s
|++++++++++++++++++++++++++++++++ | 62% ~03s
|++++++++++++++++++++++++++++++++ | 63% ~03s
|+++++++++++++++++++++++++++++++++ | 64% ~03s
|+++++++++++++++++++++++++++++++++ | 65% ~03s
|++++++++++++++++++++++++++++++++++ | 66% ~02s
|++++++++++++++++++++++++++++++++++ | 67% ~02s
|+++++++++++++++++++++++++++++++++++ | 68% ~02s
|+++++++++++++++++++++++++++++++++++ | 69% ~02s
|++++++++++++++++++++++++++++++++++++ | 71% ~02s
|++++++++++++++++++++++++++++++++++++ | 72% ~02s
|+++++++++++++++++++++++++++++++++++++ | 73% ~02s
|+++++++++++++++++++++++++++++++++++++ | 74% ~02s
|++++++++++++++++++++++++++++++++++++++ | 75% ~02s
|++++++++++++++++++++++++++++++++++++++ | 76% ~02s
|+++++++++++++++++++++++++++++++++++++++ | 77% ~02s
|+++++++++++++++++++++++++++++++++++++++ | 78% ~02s
|++++++++++++++++++++++++++++++++++++++++ | 79% ~02s
|++++++++++++++++++++++++++++++++++++++++ | 80% ~01s
|+++++++++++++++++++++++++++++++++++++++++ | 81% ~01s
|++++++++++++++++++++++++++++++++++++++++++ | 82% ~01s
|++++++++++++++++++++++++++++++++++++++++++ | 83% ~01s
|+++++++++++++++++++++++++++++++++++++++++++ | 84% ~01s
|+++++++++++++++++++++++++++++++++++++++++++ | 85% ~01s
|++++++++++++++++++++++++++++++++++++++++++++ | 86% ~01s
|++++++++++++++++++++++++++++++++++++++++++++ | 87% ~01s
|+++++++++++++++++++++++++++++++++++++++++++++ | 88% ~01s
|+++++++++++++++++++++++++++++++++++++++++++++ | 89% ~01s
|++++++++++++++++++++++++++++++++++++++++++++++ | 91% ~01s
|++++++++++++++++++++++++++++++++++++++++++++++ | 92% ~01s
|+++++++++++++++++++++++++++++++++++++++++++++++ | 93% ~01s
|+++++++++++++++++++++++++++++++++++++++++++++++ | 94% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++ | 95% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++ | 96% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++++++ | 97% ~00s
|+++++++++++++++++++++++++++++++++++++++++++++++++ | 98% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 99% ~00s
|++++++++++++++++++++++++++++++++++++++++++++++++++| 100% elapsed=07s
Calculating cluster 4
| | 0 % ~calculating
|+ | 1 % ~09s
|++ | 2 % ~09s
|++ | 3 % ~09s
|+++ | 5 % ~09s
|+++ | 6 % ~08s
|++++ | 7 % ~08s
|+++++ | 8 % ~10s
|+++++ | 9 % ~10s
|++++++ | 10% ~10s
|++++++ | 11% ~09s
|+++++++ | 13% ~09s
|+++++++ | 14% ~09s
|++++++++ | 15% ~09s
|+++++++++ | 16% ~09s
|+++++++++ | 17% ~08s
|++++++++++ | 18% ~08s
|++++++++++ | 20% ~08s
|+++++++++++ | 21% ~08s
|+++++++++++ | 22% ~08s
|++++++++++++ | 23% ~08s
|+++++++++++++ | 24% ~08s
|+++++++++++++ | 25% ~07s
|++++++++++++++ | 26% ~07s
|++++++++++++++ | 28% ~07s
Find top.var genes in cmp.top100markers.res1
cmp.top100markers.res1 <- cmp.allmarkers.res1 %>% group_by(cluster) %>% top_n(n = 100, wt = abs(avg_log2FC))
cmp.top100markers.res1 <- cmp.top100markers.res1[cmp.top100markers.res1$p_val_adj <= 0.05, ]
for(cluster in unique(cmp.top100markers.res1$cluster)){
num.hits <- nrow(cmp.top100markers.res1[cmp.top100markers.res1$cluster == cluster, ])
print(paste("cluster", cluster, "has", num.hits, "genes"))
}
[1] "cluster 0 has 99 genes"
[1] "cluster 1 has 99 genes"
[1] "cluster 2 has 99 genes"
[1] "cluster 3 has 99 genes"
[1] "cluster 4 has 97 genes"
[1] "cluster 5 has 98 genes"
[1] "cluster 6 has 98 genes"
[1] "cluster 7 has 96 genes"
[1] "cluster 8 has 99 genes"
[1] "cluster 9 has 97 genes"
[1] "cluster 10 has 100 genes"
[1] "cluster 11 has 98 genes"
[1] "cluster 12 has 97 genes"
[1] "cluster 13 has 99 genes"
[1] "cluster 14 has 2 genes"
[1] "cluster 15 has 98 genes"
[1] "cluster 16 has 98 genes"
[1] "cluster 17 has 96 genes"
For each resolution, what percentage of cells in each cluster are enriched for one of our clust.IDs?
Test: what percentage of each new clusterID matches one of the older clusters?
top.var <- cmp.object@assays$RNA@var.features[1:150]
# top.var
n.vargenes <- c()
for(gene in top.var){
if(gene %in% cmp.top100markers.res1$gene){
n.vargenes <- c(n.vargenes, gene)
}
}
print(length(n.vargenes))
[1] 105
top.var[!(top.var %in% n.vargenes)]
[1] "Ccl5" "Hspa1a" "Ppbp" "Gm26737" "Olfr1369-ps1" "Ms4a3" "Ly6d" "Gm26885" "Olfr655" "Ifit1" "Trbc1" "Lgi1"
[13] "Mt2" "Cd209a" "Dqx1" "Retn" "Prss34" "S100a8" "Txndc16" "Gm17657" "B3galt2" "Tcf7" "Wnt2b" "Trbc2"
[25] "S100a9" "Xlr" "Ccl6" "S100a2" "Tc2n" "Clec1b" "Trp53inp2" "Gm42418" "Isg15" "Ly6k" "Gm50255" "Gm27252"
[37] "Chil3" "Iigp1" "Cxcl2" "Rsad2" "Hoga1" "Rpl39l" "B230312C02Rik" "Xcr1" "Rtp4"
Absolutely terrible overlap, no enrichment of any of these across the new clustering algorithm. Maybe should try 95% variation covered
time for the super scarey moment to see if the cells from seuratv1 still cluster together on in seurat v4
DimPlot(cmp.object,
reduction = "umap",
group.by = "clust.ID",
# split.by = "orig.ident",
cols = c("gray", "orange", "blue", "red", "green"),)
DimPlot(cmp.object,
reduction = "umap",
group.by = "orig.ident",
split.by = "clust.ID",
cols = c("gray", "orange", "blue", "red", "green"),)
set total.var <- 80%
tot.var <- percent.variance(cmp.object@reductions$pca@stdev, plot.var = FALSE, return.val = TRUE)
ndims <- length(which(cumsum(tot.var) <= 80))
cmp.object <- FindNeighbors(cmp.object, dims = 1:ndims)
cmp.object <- FindClusters(cmp.object, resolution = 0.5)
cmp.object <- RunUMAP(cmp.object, dims = 1: ndims)
Plot UMAP
for(x in c(0.5, 1, 1.5, 2, 2.5)){
cmp.object <- FindClusters(cmp.object, resolution = x)
saveRDS(cmp.object, file = paste0(projectName, "_dim", ndims, ".RDS"))
}
for (meta.col in colnames(cmp.object@meta.data)){
if(grepl(pattern = ("RNA_snn_res"), x = meta.col)==TRUE){
myplot <- DimPlot(cmp.object,
group.by = meta.col,
reduction = "umap",
cols = color.palette
) +
ggtitle(paste0(projectName, " dim", ndims, "res", gsub("RNA_snn_res", "", meta.col) ))
plot(myplot)
}
}
what’s the max resolution we can achieve while keepign clusters stable?
plot.title <- paste0(projectName, "_clustree_ndim", max(cmp.object@commands$RunUMAP.RNA.pca$dims))
my.clustree <- clustree(cmp.object, prefix = "RNA_snn_res.", node_colour = "sc3_stability", exprs = "scale.data") +
scale_color_continuous(low = 'red3', high = 'white') +
ggtitle(plot.title)
png(filename = paste0(plot.title, ".png"), height = 800, width = 1600)
plot(my.clustree)
dev.off()
for each resolution, number/percentage of cells in each cluster?
tot.cells <- nrow(cmp.object@meta.data)
for (meta.col in colnames(cmp.object@meta.data)){
if(grepl(pattern = ("RNA_snn_res"), x = meta.col)==TRUE){
new.clusters <- sort(as.numeric(levels(cmp.object@meta.data[[meta.col]])))
stats.df <- data.frame(matrix(ncol = 2, nrow = length(new.clusters)))
colnames(stats.df) <- c("num_cells", "pct_pop")
rownames(stats.df) <- new.clusters
meta.df <- cmp.object@meta.data
for(row.id in rownames(stats.df)){
num.x <- nrow(meta.df[meta.df[meta.col] == row.id,])
pct.x <- as.integer(num.x / tot.cells *100)
# print(pct.x)
stats.df[row.id, "num_cells"] <- num.x
stats.df[row.id, "pct_pop"] <- pct.x
}
print(stats.df)
}
}
For each resolution, what percentage of cells in each cluster are enriched for one of our clust.IDs?
Test: what percentage of each new clusterID matches one of the older clusters?
for (meta.col in colnames(cmp.object@meta.data)){
if(grepl(pattern = ("RNA_snn_res"), x = meta.col)==TRUE){
new.clusters <- sort(as.numeric(levels(cmp.object@meta.data[[meta.col]])))
enrich.df <- data.frame(matrix(ncol = 4, nrow = length(new.clusters)))
colnames(enrich.df) <- c(3, 17, 10, 11)
rownames(enrich.df) <- new.clusters
meta.df <- cmp.object@meta.data
for(row.id in rownames(enrich.df)){
tot.clus <- nrow(meta.df[meta.df[[meta.col]] == row.id,])
for(col.id in colnames(enrich.df)){
num.x <- nrow(meta.df[(meta.df[[meta.col]] == row.id) & (meta.df$clust.ID == col.id),])
pct.x <- as.integer(num.x / tot.clus *100)
# print(pct.x)
enrich.df[row.id, col.id] <- pct.x
}
}
colnames(enrich.df) <- sapply(colnames(enrich.df), function(x) paste0("oldcluster", x))
rownames(enrich.df) <- sapply(rownames(enrich.df), function(x) paste0("newcluster", x))
xlsx::write.xlsx(enrich.df, file = paste0("PctOfNewClustersOverlappingOldClusters_", projectName, "_dim", ndims, ".xlsx"), sheetName = paste0(gsub("RNA_snn_", "", meta.col)), append = TRUE)
print(enrich.df)
}
}
Absolutely terrible overlap, no enrichment of any of these across the new clustering algorithm. Maybe should try 95% variation covered
time for the super scarey moment to see if the cells from seuratv1 still cluster together on in seurat v4
DimPlot(cmp.object,
reduction = "umap",
group.by = "clust.ID",
# split.by = "orig.ident",
cols = c("gray", "orange", "blue", "red", "green"),)
DimPlot(cmp.object,
reduction = "umap",
group.by = "orig.ident",
split.by = "clust.ID",
cols = c("gray", "orange", "blue", "red", "green"),)
set total.var <- 95%
tot.var <- percent.variance(cmp.object@reductions$pca@stdev, plot.var = FALSE, return.val = TRUE)
ndims <- length(which(cumsum(tot.var) <= 95))
cmp.object <- FindNeighbors(cmp.object, dims = 1:ndims)
cmp.object <- FindClusters(cmp.object, resolution = 0.5)
cmp.object <- RunUMAP(cmp.object, dims = 1: ndims)
Plot UMAP
for(x in c(0.5, 1, 1.5, 2, 2.5)){
cmp.object <- FindClusters(cmp.object, resolution = x)
}
saveRDS(cmp.object, file = paste0(projectName, "_dim", ndims, ".RDS"))
cmp.object <- readRDS("CMP_dim41.RDS")
plot.title <- paste0(projectName, "_clustree_ndim", max(cmp.object@commands$RunUMAP.RNA.pca$dims))
my.clustree <- clustree(cmp.object, prefix = "RNA_snn_res.", node_colour = "sc3_stability", exprs = "scale.data") +
scale_color_continuous(low = 'red3', high = 'white') +
ggtitle(plot.title)
png(filename = paste0(plot.title, ".png"), height = 800, width = 1600)
plot(my.clustree)
dev.off()
For each resolution, what percentage of cells in each cluster are enriched for one of our clust.IDs?
Test: what percentage of each new clusterID matches one of the older clusters?
for (meta.col in colnames(cmp.object@meta.data)){
if(grepl(pattern = ("RNA_snn_res"), x = meta.col)==TRUE){
new.clusters <- sort(as.numeric(levels(cmp.object@meta.data[[meta.col]])))
enrich.df <- data.frame(matrix(ncol = 4, nrow = length(new.clusters)))
colnames(enrich.df) <- c(3, 17, 10, 11)
rownames(enrich.df) <- new.clusters
meta.df <- cmp.object@meta.data
for(row.id in rownames(enrich.df)){
tot.clus <- nrow(meta.df[meta.df[[meta.col]] == row.id,])
for(col.id in colnames(enrich.df)){
num.x <- nrow(meta.df[(meta.df[[meta.col]] == row.id) & (meta.df$clust.ID == col.id),])
pct.x <- as.integer(num.x / tot.clus *100)
# print(pct.x)
enrich.df[row.id, col.id] <- pct.x
}
}
colnames(enrich.df) <- sapply(colnames(enrich.df), function(x) paste0("oldcluster", x))
rownames(enrich.df) <- sapply(rownames(enrich.df), function(x) paste0("newcluster", x))
xlsx::write.xlsx(enrich.df, file = paste0("PctOfNewClustersOverlappingOldClusters_", projectName, "_dim", ndims, ".xlsx"), sheetName = paste0(gsub("RNA_snn_", "", meta.col)), append = TRUE)
print(enrich.df)
}
}
Absolutely terrible overlap, no enrichment of any of these across the new clustering algorithm. Maybe should try 95% variation covered
time for the super scarey moment to see if the cells from seuratv1 still cluster together on in seurat v4
DimPlot(cmp.object,
reduction = "umap",
group.by = "clust.ID",
pt.size = .1,
# split.by = "orig.ident",
cols = c("gray", "orange", "blue", "red", "green"),)
DimPlot(cmp.object,
reduction = "umap",
group.by = "orig.ident",
split.by = "clust.ID",
cols = c("gray", "orange", "blue", "red", "green"),)
Let’s see if we can get some gene expression profiles on these…
gene.list <- c("Gata1", "Gata2", "Pf4", "Dntt", "Mpo", "Meis1", "Irf8", "Elane", "Fli1", "Zfpm1")
VlnPlot(cmp.object, features = gene.list, group.by = "clust.ID", pt.size = 0.01, cols = c("gray", "orange", "blue", "red", "green"))
Used the exce doc to do some fancy conditional formatting. Old cluster 17 is pretty dispersed until you it resolution 2.5. Otherise, cells in old cluster 17 do not constitute more than 40% of any cells in the new clusters.
As far as I can see, the two approaches are to do DGEof new CMP w/ resolution = 2.5, AND/OR do DGe using older cluster IDs. Sure seems to make sense to do both…
Strt with comparing all clusters against all other clusters Write out cluster info
calculate FindAllMarkers() for different idents and save to new file
ident.list <- c("RNA_snn_res.0.5", "RNA_snn_res.1", "RNA_snn_res.1.5", "RNA_snn_res.2", "RNA_snn_res.2.5", "clust.ID")
for(tested.ident in ident.list){
Idents(cmp.object) <- tested.ident
all.markers <- FindAllMarkers(cmp.object)
xlsx::write.xlsx(x = all.markers[,c("avg_log2FC", "p_val_adj", "cluster", "gene")],
file = paste0(projectName, "_FindALLMarkers_res2.5.xlsx"),
sheetName = tested.ident,
col.names = TRUE,
row.names = FALSE,
append = TRUE)
}
Create FindAllMarkers() lists for GSEA
Idents(cmp.object) <- "RNA_snn_res.2.5"
res.2.5.allmarkers <- FindAllMarkers(cmp.object)
Mouse2HumanTable <- Mouse2Human(res.2.5.allmarkers$gene)
HGNC <- with(Mouse2HumanTable, Mouse2HumanTable$HGNC[match(res.2.5.allmarkers$gene, Mouse2HumanTable$MGI)])
head(res.2.5.allmarkers)
res.2.5.allmarkers$HGNC <- HGNC
tail(res.2.5.allmarkers)
sig.res.2.5 <- res.2.5.allmarkers[res.2.5.allmarkers$p_val_adj <= 0.05, ]
sig.res.2.5 <- sig.res.2.5[c("avg_log2FC", "HGNC", "cluster")]
sig.res.2.5 <- sig.res.2.5[!(sig.res.2.5$HGNC == "" | is.na(sig.res.2.5$HGNC)),] # GSEA will fail if there are any blanks or NAs in the table
sig.res.2.5 <- sig.res.2.5[]
for(cluster in unique(sig.res.2.5$cluster)){
print(paste("writing cluster", cluster))
new.table <- sig.res.2.5[sig.res.2.5$cluster == cluster, c("HGNC", "avg_log2FC")]
new.table <- new.table[order(-new.table$avg_log2FC), ]
write.table(new.table, file = paste0("RankList_res2.5_findAll_hgnc/res.2.5cluster", cluster, ".rnk"), quote = FALSE, row.names = FALSE, col.names = TRUE, sep = "\t", )
}
calculate FindMarkers() that distinguish each cluster (might overlab between clusters)
ident.list <- c("RNA_snn_res.0.5", "RNA_snn_res.1", "RNA_snn_res.1.5", "RNA_snn_res.2", "RNA_snn_res.2.5", "clust.ID")
for(tested.ident in ident.list){
object.copy <- cmp.object
Idents(object.copy) <- tested.ident
print(paste("testing", tested.ident))
for (cluster in sort(as.numeric(levels(object.copy@meta.data[[tested.ident]])))){
print(paste("looking at cluster", cluster))
cluster.markers <- FindMarkers(object.copy, ident.1 = cluster)
try(
xlsx::write.xlsx(x = cluster.markers[,c("avg_log2FC", "p_val_adj")],
file = paste0(projectName, "_FindMarkers_", gsub("RNA_snn_", "", tested.ident), ".xlsx"),
sheetName = paste0("clst", cluster),
col.names = TRUE,
row.names = TRUE,
append = TRUE)
)
}
remove(object.copy)
}
for (cluster in sort(as.numeric(levels(cmp.object@meta.data$RNA_snn_res.2.5)))){
cluster.markers <- FindMarkers(cmp.object, ident.1 = cluster)
xlsx::write.xlsx(x = cluster.markers[,c("avg_log2FC", "p_val_adj")],
file = paste0(projectName, "_FindMarkers_res2.5.xlsx"),
sheetName = paste0("clst", cluster),
col.names = TRUE,
row.names = TRUE,
append = TRUE)
}
reset ident as “clust.ID” and rerun FindAllMarkers()
Idents(cmp.object) <- "clust.ID"
all.markers <- FindAllMarkers(cmp.object)
xlsx::write.xlsx(x = all.markers[,c("avg_log2FC", "p_val_adj", "cluster", "gene")],
file = paste0(projectName, "_FindALLMarkers_clustID.xlsx"),
sheetName = "clustID",
col.names = TRUE,
row.names = FALSE,
append = TRUE)
# Idents(cmp.object) <- "clust.ID"
for (cluster in unique(cmp.object@meta.data$clust.ID)){
print(cluster)
cluster.markers <- FindMarkers(cmp.object, ident.1 = cluster)
xlsx::write.xlsx(x = cluster.markers[,c("avg_log2FC", "p_val_adj")],
file = paste0(projectName, "_FindMarkers_clustID.xlsx"),
sheetName = paste0("oldclust", cluster),
col.names = TRUE,
row.names = TRUE,
append = TRUE)
}
Previously defined biomark genes based on PC contributions. Original list was based on all msAggr, but let’s see how CMP subset does?
VizDimLoadings(cmp.object, dims = 1:10, nfeatures = 30, reduction = "pca", ncol = 2)
pca.df <- cmp.object[["pca"]]
pca.df <- as.data.frame(as.matrix(slot(object = pca.df, name = "feature.loadings")))
print(cmp.object[["pca"]], dims = 2, nfeatures = 5)
rownames(pca.df[pca.df$PC_2 %in% sort(pca.df$PC_2, decreasing = TRUE)[1:5], ])
rownames(pca.df[pca.df$PC_2 %in% sort(pca.df$PC_2)[1:5], ])
now we can get a list of principal components!
first pull the list of oldAnalysis CMP top PC genes
cmp.biomark <- read.table(file = "/Users/heustonef/Desktop/CMPSubpops/BioMark/ProbePanels/CMP_PCTopGenes.txt", sep = "\t", header = TRUE)
biomark.cmptargets <- c()
for(df.col in 1:ncol(cmp.biomark)){
biomark.cmptargets <- c(biomark.cmptargets, biomark[,df.col])
}
print(colnames(biomark))
print(paste("total gene count:", length(biomark.cmptargets)))
Now get the list of current pc gene trgets (oldAnalysis used ndim = 1:6, so we’ll start with that range)
pc.list <- c("PC_1", "PC_2", "PC_3", "PC_4", "PC_5", "PC_6")
pc.genes <- lapply(pc.list, function(x) rownames(pca.df[pca.df[[x]] %in% sort(pca.df[[x]], decreasing = TRUE)[1:30],])) #targeting roughly 180 genes like in biomark.cmptargets
pc.genes <- unique(unlist(pc.genes))
print(paste("total gene count:", length(pc.genes)))
Now compare the lists, I guess:
# setdiff(x,y) gives you things in x not in y. setdiff(y,x) gives you things in y not in x
setdiff(biomark.cmptargets, pc.genes)
# print(paste("\n length:", length(setdiff(biomark.cmptargets, pc.genes))))
writeLines(c("", "length:", length(setdiff(biomark.cmptargets, pc.genes))))
Umm, yeah that went kinda how I expected. Let’s do this again, but for the actual biomark gene lists.
biomark <- read.table(file = "/Users/heustonef/Desktop/CMPSubpops/BioMark/ProbePanels/BiomarkProbeList.txt", sep = "\t")
biomark <- biomark[,1]
setdiff(biomark, pc.genes)
writeLines(c("", "length:", length(setdiff(biomark, pc.genes))))
What if we increase the number of pcs but decrease the depth of each? This might cover more of biomark, which was originally developed using msAggr instead of only the CMP subset
pc.list <- c("PC_1", "PC_2", "PC_3", "PC_4", "PC_5", "PC_6", "PC_7", "PC_8", "PC_9", "PC_10")
pc.genes <- lapply(pc.list, function(x) rownames(pca.df[pca.df[[x]] %in% sort(pca.df[[x]], decreasing = TRUE)[1:20],]))
pc.genes <- unique(unlist(pc.genes))
print(paste("total gene count:", length(pc.genes)))
setdiff(biomark, pc.genes)
writeLines(c("", "length:", length(setdiff(biomark, pc.genes))))
For comparison, let’s just see how many of biomark.cmptargets were actually included in biomark
setdiff(biomark.cmptargets, biomark)
writeLines(c("", "length:", length(setdiff(biomark.cmptargets, pc.genes))))
length(biomark) - length(setdiff(biomark, biomark.cmptargets))
length(biomark) - length(setdiff(biomark, pc.genes))
So when you look at it like that, it’s not actually that far off.
What are the similarities?:
setdiff(setdiff(biomark, biomark.cmptargets), setdiff(biomark, pc.genes))
These are genes from the 97probes not in the old CMP set that are also not in the new CMP set. Other than Itga2b (which is a failed probe anyway), nothing screams. Also we’d have thrown Flt3 and Cd34 for in anyway because they’re requisite cell surface markers (also Flt3 surface marker is expensive but otherwise not noteworthy and not used in the current sorting strategy)
What about cell surface marker expression? * Cd34 * Cd16/32 * Cd9 * Cd41 * Cd48 * Sca1 (just throw that in for sh*&s and giggles)
surface.markers <- c("Cd34", "Fcgr3", "Fcgr2b", "Cd9", "Itga2b", "Cd48", "Ly6a")
FeaturePlot(cmp.object, features = surface.markers, pt.size = 1, split.by = "clust.ID", ncol = 1)
Save as png
png(filename = "FeaturePlot_CMP_surfaceMarkers_clustIDfacet.png", height = 1600, width = 1600)
FeaturePlot(cmp.object, features = surface.markers, pt.size = 1, split.by = "clust.ID", ncol = 1)
dev.off()
Do clustering using biomark RNAs as input
# Read in BiomarkRNAs
biomark.rnas <- read.table('/Users/heustonef/Desktop/10XGenomicsData/BiomarkRNAs.txt')
biomark.rnas <- biomark.rnas$V1
use biomark RNAs to define dimensional reduction
cmp.object <- readRDS("CMP_raw.RDS")
cmp.object <- RunPCA(cmp.object, features = biomark.rnas, ndims.print = 1:5, , nfeatures.print = 5)
ElbowPlot(cmp.object, ndims = 50)
Now run the clustering
tot.var <- percent.variance(cmp.object@reductions$pca@stdev, plot.var = FALSE, return.val = TRUE)
ndims <- length(which(cumsum(tot.var) <= 90))
cmp.object <- FindNeighbors(cmp.object, dims = 1:ndims)
cmp.object <- FindClusters(cmp.object, resolution = 0.5)
cmp.object <- RunUMAP(cmp.object, dims = 1: ndims)
find the clusters
for(x in c(0.5, 1, 1.5, 2, 2.5)){
cmp.object <- FindClusters(cmp.object, resolution = x)
}
Plot the umaps and cell cluster ids
for (meta.col in colnames(cmp.object@meta.data)){
if(grepl(pattern = ("RNA_snn_res"), x = meta.col)==TRUE){
myplot <- DimPlot(cmp.object,
group.by = meta.col,
reduction = "umap",
cols = color.palette
) +
ggtitle(paste0(projectName, " dim", ndims, "res", gsub("RNA_snn_res", "", meta.col) ))
plot(myplot)
}
}
Get # cells in each cluster
tot.cellcount <- nrow(cmp.object@meta.data)
res05.list <- sort(unique(cmp.object@meta.data$RNA_snn_res.0.5), decreasing = FALSE)
sapply(res05.list,
function(x){
print(
paste(
"cluster", x, "=",
nrow(cmp.object@meta.data[cmp.object@meta.data$RNA_snn_res.0.5 == x,]),
"cells or",
round(nrow(cmp.object@meta.data[cmp.object@meta.data$RNA_snn_res.0.5 == x,])/tot.cellcount*100, digits = 2),
"% of total"
)
)
}
)
So we did the dimensional reduction based on the biomark RNAs, then did our UMAP nearest neighbor clustering.
In the biomark hierarchcial clustering analysis I assayed 167 cells. The smallest cluster I detected had 3 cells, or 1.8% of total, and this is an uncomfortably small number of cells. Based on the UMAP calculations I would therefore expect to find 11 or 12 of the predicted 15 clusters. I found 12, and I don’t really like that last one, so 11 or 12. Since I did the hierarchcial clustering yesterday and did this math today, we can say it was independent of these results and therefore totally legit. Yay!!